	
********************************************************************************
** 	TITLE: b7_4_sag_roa_crim_repulled.do
**
**	PROJECT: IGNITE
** 
**  PURPOSE:  This File Cleaning the Saginaw Criminal Cases Repulled (both pre-covid criminal 
**	and post-covid criminal cases) that had incorrectly pulled information. 

********************************************************************************
		set sortseed 13

set more off

**Importing the case number information that came out of ROA parsing; Repulling Prior Cases
import excel "$input_data/sag_roas/roasparsed_postcrim_repull.xlsx", sheet("Sheet1") firstrow clear 

**saving tempfile for appending

tempfile post_roa

save `post_roa', replace

import excel "$input_data/sag_roas/roasparsed_precrim_repull.xlsx", sheet("Sheet1") firstrow clear 

append using `post_roa'

gen repull = 1

drop if case_id == ""

duplicates drop case_id, force


*Creating diff_roa variable
replace roa_case_id = subinstr(roa_case_id, "-", "",.)
gen diff_roa = .
replace diff_roa = 1 if roa_case_id != case_id & roa_case_id != "" 
replace diff_roa = 0 if  roa_case_id == case_id & roa_case_id != "" 

 
gen c_court =  (strpos(court, "Circuit") > 0)
replace c_court = . if court == ""

gen d_court =  (strpos(court, "District") > 0)
replace d_court = . if court == ""

gen criminal_court =  (strpos(court, "Criminal") > 0)
replace criminal_court = . if court == ""


//there are also Open, INACTIVE, Bench Warrant Issued, and Suspended
gen case_closed =  (strpos(case_status, "CLOSED") > 0)
replace case_closed = . if case_status == ""


/*
cases that messed up in parsing:
*/

gen mess =  (strpos(date_filed, "Ordinance Misdemeanor Criminal") > 0 | strpos(date_filed, "Statute Misdemeanor Criminal") > 0 )
replace mess = . if date_filed == "" & date_filed == "" 
/*
https://www.notafraidtowin.com/court-appointed-attorney-vs-retained-attorney/
Retained lawyer is private while court-appointed is a public defender
*/

gen public_defense = (strpos(attorney_type, "Court Appointed") > 0)
replace public_defense = . if attorney_type == ""



/*
Disposition, cases ending in plea deals
*/

gen plea = 0
gen sentenced = 0
forval t = 1(1)4{
	replace plea = 1 if strpos(d_disp_time_event`t', "Plea") > 0
	replace sentenced = 1 if strpos(d_disp_time_event`t', "Sentence") > 0
}

replace plea = . if roa_case_id == ""

replace sentenced = . if roa_case_id == ""

//Pre-release e-monitoring
gen pre_emonitor = 0
forval t = 1(1)270{
	replace pre_emonitor = 1 if strpos(e_event_date`t', "MONITOR ON RELEASE") > 0
}

summ diff_roa case_closed c_court d_court criminal_court mess public_defense plea sentenced pre_emonitor
summ diff_roa if c_court == 1


//Cleaning event-dates and event-comments
forval t = 1(1)270{
replace e_event_date`t' = subinstr(e_event_date`t',char(34), "",.)
replace e_event_date`t' = subinstr(e_event_date`t',"[['\\n        ", "",.)
replace e_event_date`t' = subinstr(e_event_date`t',"\\n        \\n        ', ' ', '\\n      ']]", "",.)
gen e_date`t' = substr(e_event_date`t',1,10)
gen e_dated`t' = date(e_date`t', "MDY")
gen e_month`t' = mofd(e_dated`t')
gen e_event`t' = substr(e_event_date`t',11,.)
drop e_event_date`t'

replace e_event_comment`t' = subinstr(e_event_comment`t',char(34), "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', 'Comment', '\\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"['[\'\\n          \', \'\\n      \', \'Comment\', \\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    ', '\\n\\n    ']]", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    , \'\\n\\n    \']']", "",.)
}

forval t = 1(1)270{

replace e_event_comment`t' = subinstr(e_event_comment`t',char(34), "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', 'Comment', '\\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"['[\'\\n          \', \'\\n      \', \'Comment\', \\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', '", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    ', '\\n\\n    ']]", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    , \'\\n\\n    \']']", "",.)

}



forval t = 1(1)17{
*d_disp_time_event1 
replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',char(34), "",.)
replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',"', ' ']]", "",.)

replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',"[['", "",.)

replace d_disp_time_event`t' = subinstr(d_disp_time_event`t'," ', '", "",.)

gen d_disp_date`t' = substr(d_disp_time_event`t',1,10)
gen d_disp_dated`t' = date(d_disp_date`t', "MDY")
gen d_disp_month`t' = mofd(d_disp_dated`t')
gen d_disp_event`t' = substr(d_disp_time_event`t',11,.)
drop d_disp_time_event`t'

*d_disp_charge_action1
replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',char(34), "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"['[\'\\n\\n      \', \'\\n        \', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n\\n      \', \'\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n      \', \'\\n\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n      \', \'\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

split d_disp_charge_action`t', parse(`"Query(function()"') generate(stub) limit(2)

drop stub2

replace d_disp_charge_action`t' = stub1

drop  stub1
}

order case_id roa_case_id court date_filed case_type case_status entitlement defendent attorney_name attorney_type c_* bs_* d_* e_*, first


saveold "$output_data/sag_roa_crim_repulled_parsed.dta", replace
